
# =============================================================
# File: App_A_robustness.R
# Purpose: Produces results from Appendix Section A: Robustness Tests
# Paper: Foreignness as an Asset: European Carbon Regulation and the Relocation Threat among Multinational Firms
# Author: Patrick Bayer, patrick.bayer@strath.ac.uk
# Date: 5 October 2022
#
# Data: ./data.csv
#
# Technical disclaimer:
# All analyses in R version 4.1.2 (2021-11-01)
# RStudio 2022.07.1 Build 554 ("Spotted Wakerobin" Release (7872775e, 2022-07-22) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1270P 2.20 GHz with 32GB RAM
# =============================================================

library(lmtest)
library(sandwich)
library(MASS)
library(MatchIt)
library(cem)
library(cobalt)
library(cowplot)
library(ggpubr)
library(marginaleffects)
library(margins)
library(AER)
library(tidyverse)

# Load data
df <- read_csv(file = "./data.csv")

# Trim sample down to true within firm sample
# (1) Limit sample to European MNCs: no allocation data for domestically-owned operations for non-EU MNCs
# (2) Exclude MNCs with only foreign-owned operations as there is no within MNC variation

df$sample <- ifelse(df$mnc.eu==1 & df$foreign.share<1,1,0)
df <- df[df$sample==1,]
df$foreign <- as.factor(df$foreign) # set variable as categorical


# =============================================================
# APPENDIX A: Robustness Tests
# =============================================================

# I implement two robustness tests:
#
# (1) I re-estimate the main models when dropping (country-by-country) all firms from a given country from the data
# (2) I implement jackknife re-sampling

# =============================================================
# (1) Country Robustness
# =============================================================

# Model (1)

# Re-estimate full sample model
m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df)
me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0",)["foreign1",])-1)*100 # robust SEs
main <- c(me,as.numeric(ci[1]),as.numeric(ci[2]), nobs(m))
main

# Set up estimation loop
m.dt <- find_data(m)
ctry.list <- sort(unique(m.dt$iso2))
OUT <- list()

for(i in (1:length(ctry.list))){
  
  # Identify firms that operate in a given country
  firm.list <- unique(m.dt$BVD[m.dt$iso2==ctry.list[i]])

  # Re-estimate model without firms from this country in the data
  m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=m.dt[!(m.dt$BVD %in% firm.list),])
  me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
  ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
  n <- nobs(m)
  est.out <- c(ctry.list[i],me,ci[1],ci[2],n)
  print(paste(i, "of", length(ctry.list))) # simple counter
  OUT[[i]] <- est.out
}

t <- bind_rows(OUT)
n <- rbind(c("full",main),t)

colnames(n) <- c("ctry", "estimate", "ci_low", "ci_up", "obs")
n$estimate <- as.numeric(n$estimate)
n$ci_low <- as.numeric(n$ci_low)
n$ci_up <- as.numeric(n$ci_up)
n$obs <- as.numeric(n$obs)
print(n, n=nrow(n))
n$id.ctry <- seq(1,dim(n)[1],1)

p1 <- ggplot(n, aes(x=factor(id.ctry), y=estimate)) +
      labs(x="Samples", y="Estimate", title="Panel A: Model (1)") +
      geom_errorbar(aes(ymin=ci_low, ymax=ci_up), width=0, lwd=1) +
      geom_hline(yintercept=0, lty=2) + 
      scale_x_discrete(breaks=n$id.ctry,
                   labels=c("full",ctry.list)) +
      geom_point(aes(y=estimate), shape=19, size=2) +
      theme_half_open(12)
p1


# =============================================================
# Model (2)
# =============================================================

# Re-estimate full sample model
m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df)
me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0",)["foreign1",])-1)*100 # robust SEs
main <- c(me,as.numeric(ci[1]),as.numeric(ci[2]), nobs(m))
main

# Set up estimation loop
m.dt <- find_data(m)
ctry.list <- sort(unique(m.dt$iso2))
OUT <- list()


for(i in (1:length(ctry.list))){
  
  # Identify firms that operate in a given country
  firm.list <- unique(m.dt$BVD[m.dt$iso2==ctry.list[i]])
  
  # Re-estimate model without firms from this country in the data
  m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=m.dt[!(m.dt$BVD %in% firm.list),])
  me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
  ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
  n <- nobs(m)
  est.out <- c(ctry.list[i],me,ci[1],ci[2],n)
  print(paste(i, "of", length(ctry.list))) # simple counter
  OUT[[i]] <- est.out
}

t <- bind_rows(OUT)
n <- rbind(c("full",main),t)

colnames(n) <- c("ctry", "estimate", "ci_low", "ci_up", "obs")
n$estimate <- as.numeric(n$estimate)
n$ci_low <- as.numeric(n$ci_low)
n$ci_up <- as.numeric(n$ci_up)
n$obs <- as.numeric(n$obs)
print(n, n=nrow(n))
n$id.ctry <- seq(1,dim(n)[1],1)


p2 <- ggplot(n, aes(x=factor(id.ctry), y=estimate)) +
  labs(x="Samples", y="Estimate", title="Panel B: Model (2)") +
  geom_errorbar(aes(ymin=ci_low, ymax=ci_up), width=0, lwd=1) +
  geom_hline(yintercept=0, lty=2) + 
  scale_x_discrete(breaks=n$id.ctry,
                   labels=c("full",ctry.list)) +
  geom_point(aes(y=estimate), shape=19, size=2) +
  theme_half_open(12)
p2

# =============================================================
# Model (3)
# =============================================================

# Re-estimate full sample model

# Matching 
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 
m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)

# Main model
m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match, weights=weights)
me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0",)["foreign1",])-1)*100 # robust SEs
main <- c(me,as.numeric(ci[1]),as.numeric(ci[2]), nobs(m))
main

# Set up estimation loop
m.dt <- find_data(m)
ctry.list <- sort(unique(m.dt$iso2))
OUT <- list()

for(i in (1:length(ctry.list))){
  
  # Identify firms that operate in a given country
  firm.list <- unique(m.dt$BVD[m.dt$iso2==ctry.list[i]])
  
  # Re-estimate model without firms from this country in the data
  m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=m.dt[!(m.dt$BVD %in% firm.list),], weights=weights)
  me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
  ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
  n <- nobs(m)
  est.out <- c(ctry.list[i],me,ci[1],ci[2],n)
  print(paste(i, "of", length(ctry.list))) # simple counter
  OUT[[i]] <- est.out
}

t <- bind_rows(OUT)
n <- rbind(c("full",main),t)

colnames(n) <- c("ctry", "estimate", "ci_low", "ci_up", "obs")
n$estimate <- as.numeric(n$estimate)
n$ci_low <- as.numeric(n$ci_low)
n$ci_up <- as.numeric(n$ci_up)
n$obs <- as.numeric(n$obs)
print(n, n=nrow(n))
n$id.ctry <- seq(1,dim(n)[1],1)


p3 <- ggplot(n, aes(x=factor(id.ctry), y=estimate)) +
  labs(x="Samples", y="Estimate", title="Panel C: Model (3)") +
  geom_errorbar(aes(ymin=ci_low, ymax=ci_up), width=0, lwd=1) +
  geom_hline(yintercept=0, lty=2) + 
  scale_x_discrete(breaks=n$id.ctry,
                   labels=c("full",ctry.list)) +
  geom_point(aes(y=estimate), shape=19, size=2) +
  theme_half_open(12)
p3

# Arrange plots
plot.column <- align_plots(p1,p2,p3, align='hv', axis='l')

# Align and order plots for final plotting
fig_ctry <- plot_grid(plot.column[[1]], plot.column[[2]], plot.column[[3]], ncol=1, align='v', axis='l')

# Export to PDF
ggexport(fig_ctry, filename="plot_robust_ctry.pdf", width=8, height=6)


# =============================================================
# (2) Jackknife Re-sampling
# =============================================================

# Model (1)

# Re-estimate full sample model
m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df)
me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0",)["foreign1",])-1)*100 # robust SEs
main <- c(me,as.numeric(ci[1]),as.numeric(ci[2]))
main

# Set up estimation loop
m.dt <- find_data(m)
obs <- nobs(m)
OUT <- list()

for(i in (1:nobs(m))){
  
  # Re-estimate model deleting one observation at a time
  m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=m.dt[-c(i),])
  me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
  ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
  est.out <- c(me,ci[1],ci[2])
  print(paste(i, "of", obs)) # simple counter
  OUT[[i]] <- est.out
}

n <- bind_rows(OUT) # depending on the machine this may take a little bit

colnames(n) <- c("estimate", "ci_low", "ci_up")
n$estimate <- as.numeric(n$estimate)
n$ci_low <- as.numeric(n$ci_low)
n$ci_up <- as.numeric(n$ci_up)


p1 <- ggplot(n, aes(x=estimate)) +
  geom_histogram(aes(x=estimate), bins=100) +
  geom_histogram(aes(x=ci_low), bins=100) +
  geom_histogram(aes(x=ci_up), bins=100) +
  labs(x="Estimates", y="", title="Panel A: Model (1)") +
  geom_vline(xintercept=main, lty=2) +
  theme_half_open(12)
p1


# Model (2)

# Re-estimate full sample model
m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df)
me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0",)["foreign1",])-1)*100 # robust SEs
main <- c(me,as.numeric(ci[1]),as.numeric(ci[2]))
main

# Set up estimation loop
m.dt <- find_data(m)
obs <- nobs(m)
OUT <- list()

for(i in (1:nobs(m))){
  
  # Re-estimate model deleting one observation at a time
  m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=m.dt[-c(i),])
  me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
  ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
  est.out <- c(me,ci[1],ci[2])
  print(paste(i, "of", obs)) # simple counter
  OUT[[i]] <- est.out
}

n <- bind_rows(OUT) # depending on the machine this may take a little bit

colnames(n) <- c("estimate", "ci_low", "ci_up")
n$estimate <- as.numeric(n$estimate)
n$ci_low <- as.numeric(n$ci_low)
n$ci_up <- as.numeric(n$ci_up)


p2 <- ggplot(n, aes(x=estimate)) +
  geom_histogram(aes(x=estimate), bins=100) +
  geom_histogram(aes(x=ci_low), bins=100) +
  geom_histogram(aes(x=ci_up), bins=100) +
  labs(x="Estimates", y="", title="Panel B: Model (2)") +
  geom_vline(xintercept=main, lty=2) +
  theme_half_open(12)
p2

# Model (3)

# Re-estimate full sample model

# Matching 
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 
m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)

# Re-estimate the main model
m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match, weights=weights)
me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0",)["foreign1",])-1)*100 # robust SEs
main <- c(me,as.numeric(ci[1]),as.numeric(ci[2]))
main


# Set up estimation loop
m.dt <- find_data(m)
obs <- nobs(m)
OUT <- list()

for(i in (1:nobs(m))){
  
  # Re-estimate model deleting one observation at a time
  m <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=m.dt[-c(i),], weights=weights)
  me <- (exp(summary(m)$coef[["foreign1","Estimate"]])-1)*100
  ci <- (exp(coefci(m, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
  est.out <- c(me,ci[1],ci[2])
  print(paste(i, "of", obs)) # simple counter
  OUT[[i]] <- est.out
}

n <- bind_rows(OUT) # depending on the machine this may take a little bit

colnames(n) <- c("estimate", "ci_low", "ci_up")
n$estimate <- as.numeric(n$estimate)
n$ci_low <- as.numeric(n$ci_low)
n$ci_up <- as.numeric(n$ci_up)


p3 <- ggplot(n, aes(x=estimate)) +
  geom_histogram(aes(x=estimate), binwidth=.35) +
  geom_histogram(aes(x=ci_low), binwidth=.35) +
  geom_histogram(aes(x=ci_up), binwidth=.35) +
  labs(x="Estimates", y="", title="Panel C: Model (3)") +
  geom_vline(xintercept=main, lty=2) +
  theme_half_open(12)
p3

# Arrange plots
plot.column <- align_plots(p1,p2,p3, align='hv', axis='l')

# Align and order plots for final plotting
fig_jackknife <- plot_grid(plot.column[[1]], plot.column[[2]], plot.column[[3]], ncol=1, align='v', axis='l')

# Export to PDF
ggexport(fig_jackknife, filename="plot_robust_jackknife.pdf", width=8, height=6)



# =============================================================
#                       END OF CODE
# =============================================================

